# Load packages
library(here)
library(dplyr)
library(readr)
library(rstan)
library(bayesrules)
library(tidyverse)
library(bayesplot)
library(rstanarm)
library(janitor)
library(tidybayes)
library(broom.mixed)
library(here)
library(sf)
library(tidycensus)
library(openxlsx)
library(s2)
# themes
theme_set(theme_minimal())
vari_names <- read_csv(here("clean_data", "nyc_names.csv"))
nyc_clean <- st_read(here("clean_data", "nyc_data.shp"), crs = 4269) 
## Reading layer `nyc_data' from data source 
##   `/Users/freddy/Documents/GitHub/454/clean_data/nyc_data.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 223 features and 24 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -74.04731 ymin: 40.55685 xmax: -73.70002 ymax: 40.91758
## Geodetic CRS:  NAD83
colnames(nyc_clean) <- colnames(vari_names)


library(openxlsx)
nta_to_census <- openxlsx::read.xlsx(here("ethnic", "Data", "census_to_nta.xlsx")) %>%
  dplyr::select(BoroName, NTACode) %>%
  rename(borough = BoroName,
         nta_id = NTACode) %>%
  unique()

nyc_clean <- nyc_clean %>%
  merge(., nta_to_census, by="nta_id") %>%
  mutate(transportation_desert_4cat = factor(transportation_desert_4cat, levels=c("No Access", "Limited Access", "Satisfactory Access", "Excellent Access")))
subway_stations <- st_read(here("ethnic","Data","stations", "geo_export_85568705-efba-4456-bdc0-3d70ff2cf8e5.shp")) %>%
  st_transform(., 4269)
## Reading layer `geo_export_85568705-efba-4456-bdc0-3d70ff2cf8e5' from data source 
##   `/Users/freddy/Documents/GitHub/454/ethnic/Data/stations/geo_export_85568705-efba-4456-bdc0-3d70ff2cf8e5.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 473 features and 5 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: -74.03088 ymin: 40.57603 xmax: -73.7554 ymax: 40.90313
## Geodetic CRS:  WGS84(DD)
bus_stations <- st_read(here("ethnic","Data","bus", "bus_stops_nyc_may2020.shp")) %>%
  st_transform(., 4269)
## Reading layer `bus_stops_nyc_may2020' from data source 
##   `/Users/freddy/Documents/GitHub/454/ethnic/Data/bus/bus_stops_nyc_may2020.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 14663 features and 6 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: 914169.1 ymin: 122626.8 xmax: 1066982 ymax: 271696.8
## Projected CRS: NAD83 / New York Long Island (ftUS)
transit_points <- read_csv(here("transit","ridership_points.csv"))%>%
  separate(Position, into=c("Point", "longitude", "latitude"), " ") %>%
  mutate(latitude = str_remove_all(latitude, "[)]"),
         longitude = str_remove_all(longitude, "[()]"),
         ) %>%
  dplyr::select(-c(Point)) %>% 
  mutate(latitude = as.numeric(latitude),
         longitude = as.numeric(longitude)) %>%
  st_as_sf(coords = c("longitude", "latitude"), crs = 4269)
#plot locations over map
subway_loc <- ggplot() +
  geom_sf(data = nyc_clean, fill = "#EBF6FF", color = "#D48DD8", size = 0.15, alpha = .8) +
  geom_sf(data = subway_stations, color="#3F123C", size=1) + 
  coord_sf(datum = st_crs(subway_stations)) +
  theme_minimal() +
  theme(panel.grid.major = element_line("transparent"),
        axis.text = element_blank()) +
  ggtitle("Subway Stop Locations \nin NYC")+ 
    theme(#panel.grid.major = element_line("transparent"),
          plot.title = element_text(size = 30, face = "bold"),
          legend.title = element_text(size = 12), 
          legend.text = element_text(size = 12)) + 
    guides(shape = guide_legend(override.aes = list(size = 8)),
           color = guide_legend(override.aes = list(size = 8)))

bus_loc <- ggplot() +
  geom_sf(data = nyc_clean, fill = "#EBF6FF", color = "#D48DD8", size = 0.15, alpha = .8) +
  geom_sf(data = bus_stations, color="#3F123C", size=.5, alpha=.5) + 
  coord_sf(datum = st_crs(subway_stations)) +
  theme_minimal() +
  theme(panel.grid.major = element_line("transparent"),
        axis.text = element_blank()) +
  ggtitle("Bus Stop Locations \nin NYC")+ 
    theme(#panel.grid.major = element_line("transparent"),
          plot.title = element_text(size = 30, face = "bold"),
          legend.title = element_text(size = 12), 
          legend.text = element_text(size = 12)) + 
    guides(shape = guide_legend(override.aes = list(size = 8)),
           color = guide_legend(override.aes = list(size = 8)))


stops <- nyc_clean %>%
  ggplot() +
  geom_sf(aes(fill = sub_count), color = "#8f98aa") +
  scale_fill_gradient(low= "lavender", high = "maroon",
                      guide = guide_legend(title = "Number of Subway Stops") ,na.value="#D6D6D6") +
  theme_minimal() +
  theme(panel.grid.major = element_line("transparent"),
        axis.text = element_blank()) +
  ggtitle("Subway Stop Counts \nin NYC")+ 
    theme(panel.grid.major = element_line("transparent"),
          plot.title = element_text(size = 30, face = "bold"),
          legend.title = element_text(size = 12), 
          legend.text = element_text(size = 12)) + 
    guides(shape = guide_legend(override.aes = list(size = 8)),
           color = guide_legend(override.aes = list(size = 8)))

bus_stops <- nyc_clean %>%
  ggplot() +
  geom_sf(aes(fill = bus_count), color = "#8f98aa") +
  scale_fill_gradient(low= "lavender", high = "maroon",
                      guide = guide_legend(title = "Number of Bus Stops") ,na.value="#D6D6D6") +
  theme_minimal() +
  theme(panel.grid.major = element_line("transparent"),
        axis.text = element_blank()) +
  ggtitle("Bus Stop Counts \nin NYC")+ 
    theme(panel.grid.major = element_line("transparent"),
          plot.title = element_text(size = 30, face = "bold"),
          legend.title = element_text(size = 12), 
          legend.text = element_text(size = 12)) + 
    guides(shape = guide_legend(override.aes = list(size = 8)),
           color = guide_legend(override.aes = list(size = 8)))


ridership <- nyc_clean%>%
  ggplot() +
  geom_sf(aes(fill = log2(mean_ridership)), color = "#8f98aa") +
  scale_fill_gradient(low= "lavender", high = "maroon",
                      guide = guide_legend(title = "Log2 Mean Ridership") ,na.value="#D6D6D6") +
  theme_minimal() +
  theme(panel.grid.major = element_line("transparent"),
        axis.text = element_blank()) +
  ggtitle("Mean (Log2) Subway Turnstile \nRidership in 2018 \nfor NYC")+ 
    theme(panel.grid.major = element_line("transparent"),
          plot.title = element_text(size = 30, face = "bold"),
          legend.title = element_text(size = 12), 
          legend.text = element_text(size = 12)) + 
    guides(shape = guide_legend(override.aes = list(size = 8)),
           color = guide_legend(override.aes = list(size = 8)))

access <- nyc_clean %>%
  ggplot() +
  geom_sf(aes(fill = transportation_desert_4cat), color = "#8f98aa") +
  scale_fill_manual(values=c("#a45371","#e5b6c7","#ebebf7","#89a2d1"),
                       guide = guide_legend(title = "Subway Accessibility Category"), na.value="#D6D6D6") +
  theme_minimal() +
  theme(panel.grid.major = element_line("transparent"),
        axis.text = element_blank()) +
  ggtitle("Subway Deserts \nin NYC")+ 
    theme(panel.grid.major = element_line("transparent"),
          plot.title = element_text(size = 30, face = "bold"),
          legend.title = element_text(size = 12), 
          legend.text = element_text(size = 12)) + 
    guides(shape = guide_legend(override.aes = list(size = 8)),
           color = guide_legend(override.aes = list(size = 8)))

`

red <- ggplot(nyc_clean) +
  geom_sf(aes(fill = below_poverty_line_count), color = "#8f98aa") +
  scale_fill_gradient(low = "#FCF5EE", high = "#E13728", guide = guide_legend(title = "Number Below \nPoverty Line")) +
  theme_minimal() +
  theme(panel.grid.major = element_line("transparent"),
        axis.text = element_blank()) +
  ggtitle("Impoverishement")+ 
    theme(panel.grid.major = element_line("transparent"),
          plot.title = element_text(size = 26, face = "bold"),
          legend.title = element_text(size = 12), 
          legend.text = element_text(size = 12)) + 
    guides(shape = guide_legend(override.aes = list(size = 8)),
           color = guide_legend(override.aes = list(size = 8)))

yellow <- ggplot(nyc_clean) +
  geom_sf(aes(fill = mean_income), color = "#8f98aa") +
  scale_fill_gradient(low = "#FCF5EE", high = "#F3D24E", guide = guide_legend(title = "Mean Income")) +
  theme_minimal() +
  theme(panel.grid.major = element_line("transparent"),
        axis.text = element_blank()) +
  ggtitle("Mean Income")+
    theme(panel.grid.major = element_line("transparent"),
          plot.title = element_text(size = 26, face = "bold"),
          legend.title = element_text(size = 12),
          legend.text = element_text(size = 12)) +
    guides(shape = guide_legend(override.aes = list(size = 8)),
           color = guide_legend(override.aes = list(size = 8)))

teal <- ggplot(nyc_clean) +
  geom_sf(aes(fill = mean_rent), color = "#8f98aa") +
  scale_fill_gradient(low = "#FCF5EE", high = "#2DBDC7", guide = guide_legend(title = "Dollars")) +
  theme_minimal() +
  theme(panel.grid.major = element_line("transparent"),
        axis.text = element_blank()) +
  ggtitle("Mean Rent")+
    theme(panel.grid.major = element_line("transparent"),
          plot.title = element_text(size = 26, face = "bold"),
          legend.title = element_text(size = 12),
          legend.text = element_text(size = 12)) +
    guides(shape = guide_legend(override.aes = list(size = 8)),
           color = guide_legend(override.aes = list(size = 8)))

purple <- ggplot(nyc_clean) +
  geom_sf(aes(fill = eviction_count), color = "#8f98aa")+
  scale_fill_gradient(low = "#FCF5EE", high = "#7826C0", guide = guide_legend(title = "Number of Evictions")) +
  theme_minimal() +
  theme(panel.grid.major = element_line("transparent"),
        axis.text = element_blank()) +
  ggtitle("Evictions")+ 
    theme(panel.grid.major = element_line("transparent"),
          plot.title = element_text(size = 26, face = "bold"),
          legend.title = element_text(size = 12), 
          legend.text = element_text(size = 12)) + 
    guides(shape = guide_legend(override.aes = list(size = 8)),
           color = guide_legend(override.aes = list(size = 8)))

orange <- ggplot(nyc_clean) +
  geom_sf(aes(fill = unemployment_count), color = "#8f98aa")+
  scale_fill_gradient(low = "#FCF5EE", high = "#FC9228", guide = guide_legend(title = "Number on \nUnemployment")) +
  theme_minimal() +
  theme(panel.grid.major = element_line("transparent"),
        axis.text = element_blank()) +
  ggtitle("Unemployment")+
    theme(panel.grid.major = element_line("transparent"),
          plot.title = element_text(size = 26, face = "bold"),
          legend.title = element_text(size = 12),
          legend.text = element_text(size = 12)) +
    guides(shape = guide_legend(override.aes = list(size = 8)),
           color = guide_legend(override.aes = list(size = 8)))

green <- ggplot(nyc_clean) +
  geom_sf(aes(fill = store_count), color = "#8f98aa")+
  scale_fill_gradient(low = "#FCF5EE", high = "#326902", guide = guide_legend(title = "Number of Stores")) +
  theme_minimal() +
  theme(panel.grid.major = element_line("transparent"),
        axis.text = element_blank()) +
  ggtitle("Retail Food Stores")+ 
    theme(panel.grid.major = element_line("transparent"),
          plot.title = element_text(size = 26, face = "bold"),
          legend.title = element_text(size = 12), 
          legend.text = element_text(size = 12)) + 
    guides(shape = guide_legend(override.aes = list(size = 8)),
           color = guide_legend(override.aes = list(size = 8)))

blue <- ggplot(nyc_clean) +
  geom_sf(aes(fill = school_count), color = "#8f98aa")+
  scale_fill_gradient(low = "#FCF5EE", high = "#5372C4", 
                      guide = guide_legend(title = "Number of Schools")) +
  theme_minimal() +
  theme(panel.grid.major = element_line("transparent"),
        axis.text = element_blank()) +
  ggtitle("Number of Schools")+ 
    theme(panel.grid.major = element_line("transparent"),
          plot.title = element_text(size = 26, face = "bold"),
          legend.title = element_text(size = 12), 
          legend.text = element_text(size = 12)) + 
    guides(shape = guide_legend(override.aes = list(size = 8)),
           color = guide_legend(override.aes = list(size = 8)))

pink <- ggplot(nyc_clean) +
  geom_sf(aes(fill = total_pop), color = "#8f98aa")+
  scale_fill_gradient(low = "#FCF5EE", high = "#F450E1", guide = guide_legend(title = "Number of People")) +
  theme_minimal() +
  theme(panel.grid.major = element_line("transparent"),
        axis.text = element_blank()) +
  ggtitle("Population")+ 
    theme(panel.grid.major = element_line("transparent"),
          plot.title = element_text(size = 26, face = "bold"),
          legend.title = element_text(size = 12), 
          legend.text = element_text(size = 12)) + 
    guides(shape = guide_legend(override.aes = list(size = 8)),
           color = guide_legend(override.aes = list(size = 8)))


brown <- ggplot(nyc_clean) +
  geom_sf(aes(fill = uninsured_count), color = "#8f98aa")+
  scale_fill_gradient(low = "#F8E3DD", high = "#6A4D39", guide = guide_legend(title = "Number of People \n without Insurance Coverage")) +
  theme_minimal() +
  theme(panel.grid.major = element_line("transparent"),
        axis.text = element_blank()) +
  ggtitle("Insurance Coverage")+ 
    theme(panel.grid.major = element_line("transparent"),
          plot.title = element_text(size = 26, face = "bold"),
          legend.title = element_text(size = 12), 
          legend.text = element_text(size = 12)) + 
    guides(shape = guide_legend(override.aes = list(size = 8)),
           color = guide_legend(override.aes = list(size = 8)))
white <- ggplot(nyc_clean) +
  geom_sf(aes(fill = white_count), color = "#8f98aa") +
  scale_fill_gradient(low = "#FCF5EE", high = "#7B435B", guide = guide_legend(title = "Number White")) +
  theme_minimal() +
  theme(panel.grid.major = element_line("transparent"),
        axis.text = element_blank()) +
  ggtitle("White Population")+ 
    theme(panel.grid.major = element_line("transparent"),
          plot.title = element_text(size = 24, face = "bold"),
          legend.title = element_text(size = 12), 
          legend.text = element_text(size = 12)) + 
    guides(shape = guide_legend(override.aes = list(size = 8)),
           color = guide_legend(override.aes = list(size = 8)))

black <- ggplot(nyc_clean) +
  geom_sf(aes(fill = black_count), color = "#8f98aa") +
  scale_fill_gradient(low = "#FCF5EE", high = "#F25F5C", guide = guide_legend(title = "Number Black")) +
  theme_minimal() +
  theme(panel.grid.major = element_line("transparent"),
        axis.text = element_blank()) +
  ggtitle("Black Population")+ 
    theme(panel.grid.major = element_line("transparent"),
          plot.title = element_text(size = 24, face = "bold"),
          legend.title = element_text(size = 12), 
          legend.text = element_text(size = 12)) + 
    guides(shape = guide_legend(override.aes = list(size = 8)),
           color = guide_legend(override.aes = list(size = 8)))

asian <- ggplot(nyc_clean) +
  geom_sf(aes(fill = asian_count), color = "#8f98aa") +
  scale_fill_gradient(low = "#FCF5EE", high = "#717EC3", guide = guide_legend(title = "Number Asian")) +
  theme_minimal() +
  theme(panel.grid.major = element_line("transparent"),
        axis.text = element_blank()) +
  ggtitle("Asian Population")+ 
    theme(panel.grid.major = element_line("transparent"),
          plot.title = element_text(size = 24, face = "bold"),
          legend.title = element_text(size = 12), 
          legend.text = element_text(size = 12)) + 
    guides(shape = guide_legend(override.aes = list(size = 8)),
           color = guide_legend(override.aes = list(size = 8)))

latinx <- ggplot(nyc_clean) +
  geom_sf(aes(fill = latinx_count), color = "#8f98aa")+
  scale_fill_gradient(low = "#FCF5EE", high = "#FC9A38", guide = guide_legend(title = "Number Latinx")) +
  theme_minimal() +
  theme(panel.grid.major = element_line("transparent"),
        axis.text = element_blank()) +
  ggtitle("Latinx Population")+ 
    theme(panel.grid.major = element_line("transparent"),
          plot.title = element_text(size = 24, face = "bold"),
          legend.title = element_text(size = 12), 
          legend.text = element_text(size = 12)) + 
    guides(shape = guide_legend(override.aes = list(size = 8)),
           color = guide_legend(override.aes = list(size = 8)))

Data Introduction

All the data used in this project are from two major sources: the Tidycensus package and NYC Open Data.

Tidycensus is an R package interface, developed by Kyle Walker and Matt Herman, that enables easy access to the US Census Bureau’s data APIs and returns Tidyverse-ready data frames from various major US Census Bureau datasets. Our demographic and socioeconomic data are drawn from the American Community Survey results found in Tidycensus package. A summary of our ACS data variables is below:

  • borough: Each Neighborhood’s Borough.
  • total_pop: Total Population by Neighborhood
  • mean_income: Mean Income by Neighborhood
  • below_poverty_line_count: Number of People Living Below the 100% Poverty Line by Neighborhood
  • mean_rent: Mean Rent by Neighborhood
  • unemployment_count: Number of People on Unemployment by Neighborhood
  • latinx_count: Number of Latinx People by Neighborhood
  • white_count: Number of White People by Neighborhood
  • black_count: Number of Black People by Neighborhood
  • native_count: Number of Native People by Neighborhood
  • asian_count: Number of Asian People by Neighborhood
  • naturalized_citizen_count: Number of Naturalized Citizens by Neighborhood
  • noncitizen_count: Number of Foreign Born People by Neighborhood
  • uninsured_count: Number of Uninsured Citizens of any Age by Neighborhood

For remaining predictors, we used NYC Open Data’s portal to identify specific predictors. In particular, we used geotagged locations of Subway Stops, Bus Stops, Grocery Stores, Schools, and Eviction Sites from the Departments of Transportation, Health, Education, and Housing to calculate neighborhood-specific variables described below:

  • school_count: Number of Public Schools by Neighborhood
  • eviction_count: Number of Evictions by Neighborhood
  • store_count: Number of Grocery Stores and Food Vendors by Neighborhood
  • sub_count: Number of Subway Stations by Neighborhood
  • bus_count: Number of Bus Stations by Neighborhood
  • perc_covered_by_transit: Percent of Neighborhood Within Walking Distance (.25 miles) of Any Subway Stop.
  • transportation_desert_4cat: Subway Accessibility by Neighborhood (None, Limited, Satisfactory, Excellent)

Lastly, we acquired subway ridership from Metropolitan Transportation Authority’s turnstile data for the week of September 7, 2019. For each station, entry/exit of each turnstile is recorded. Then, we aggregated this information by taking the station-specific average of subway ridership across the 7 days in the week. Finally, we geotagged each listed station, then took the mean of ridership at all subway stations in each neighborhood to create.

  • mean_ridership: Mean Subway Ridership by Neighborhood for the week of September 7th.

Data Summaries

Our data has 224 observations of 26 variables. Below is a preview of our dataset with colnames attached.

library(kableExtra)
kable(head(nyc_clean)) %>% kable_styling() 
nta_id total_pop mean_income below_poverty_line_count below_poverty_line_and_50_count mean_rent unemployment_count latinx_count white_count black_count native_count asian_count naturalized_citizen_count noncitizen_count uninsured_count school_count eviction_count store_count sub_count bus_count mean_ridership perc_covered_by_transit transportation_desert_3cat transportation_desert_4cat borough geometry
BK0101 26308 98338.67 2397 1289 2062.667 582 3284 20526 482 40 1052 3777 3129 1797 6 68 71 2 53 9410.500 28.36395 Satisfactory Access Limited Access Brooklyn MULTIPOLYGON (((-73.94074 4…
BK0102 57774 101238.92 9120 4474 2330.077 1710 18227 32237 1460 0 4008 6802 7746 3725 12 204 129 2 97 26603.000 55.26492 Satisfactory Access Satisfactory Access Brooklyn MULTIPOLYGON (((-73.96355 4…
BK0103 36891 30309.25 18285 5970 1194.875 457 3351 31799 1288 20 194 3548 1012 711 6 45 58 3 35 6348.667 87.71022 Satisfactory Access Satisfactory Access Brooklyn MULTIPOLYGON (((-73.96762 4…
BK0104 41861 78746.25 8406 3876 1801.667 1678 13602 17682 3960 210 5515 5770 5325 3040 15 157 117 6 77 8006.000 63.00854 Satisfactory Access Satisfactory Access Brooklyn MULTIPOLYGON (((-73.95083 4…
BK0201 23758 140543.00 1504 585 2275.833 579 1517 17643 1330 44 2041 2082 1448 852 1 25 30 2 18 5275.000 98.46067 Satisfactory Access Satisfactory Access Brooklyn MULTIPOLYGON (((-73.99066 4…
BK0202 24603 132850.00 3776 970 2413.875 1195 3772 13288 3369 0 2893 2151 2728 1244 17 111 73 8 104 19444.000 159.13596 Excellent Access Excellent Access Brooklyn MULTIPOLYGON (((-73.99327 4…

Below is a numeric summary of each variable’s distribution.

summary(nyc_clean)
##     nta_id            total_pop      mean_income     below_poverty_line_count
##  Length:224         Min.   :    0   Min.   : 23149   Min.   :    0           
##  Class :character   1st Qu.:18180   1st Qu.: 50265   1st Qu.: 1555           
##  Mode  :character   Median :31624   Median : 67241   Median : 4216           
##                     Mean   :32323   Mean   : 71947   Mean   : 5905           
##                     3rd Qu.:47176   3rd Qu.: 86565   3rd Qu.: 8774           
##                     Max.   :97786   Max.   :211822   Max.   :28755           
##                                     NA's   :42                               
##  below_poverty_line_and_50_count   mean_rent    unemployment_count
##  Min.   :    0.0                 Min.   : 792   Min.   :   0.0    
##  1st Qu.:  849.2                 1st Qu.:1319   1st Qu.: 424.2    
##  Median : 2681.0                 Median :1509   Median : 918.5    
##  Mean   : 3088.9                 Mean   :1601   Mean   :1083.5    
##  3rd Qu.: 4645.5                 3rd Qu.:1744   3rd Qu.:1528.5    
##  Max.   :13934.0                 Max.   :3279   Max.   :4697.0    
##                                  NA's   :42                       
##   latinx_count    white_count     black_count       native_count  
##  Min.   :    0   Min.   :    0   Min.   :    0.0   Min.   :  0.0  
##  1st Qu.: 1976   1st Qu.:  482   1st Qu.:  471.5   1st Qu.:  0.0  
##  Median : 5575   Median : 4960   Median : 2013.0   Median : 26.0  
##  Mean   : 9687   Mean   : 9846   Mean   : 7264.6   Mean   : 56.5  
##  3rd Qu.:14059   3rd Qu.:14643   3rd Qu.: 9128.2   3rd Qu.: 72.0  
##  Max.   :60712   Max.   :69259   Max.   :69697.0   Max.   :601.0  
##                                                                   
##   asian_count      naturalized_citizen_count noncitizen_count uninsured_count  
##  Min.   :    0.0   Min.   :    0             Min.   :    0    Min.   :    0.0  
##  1st Qu.:  267.2   1st Qu.: 2663             1st Qu.: 1555    1st Qu.:  707.8  
##  Median : 2259.5   Median : 6408             Median : 4622    Median : 1909.5  
##  Mean   : 4514.7   Mean   : 6921             Mean   : 5234    Mean   : 2449.1  
##  3rd Qu.: 6020.8   3rd Qu.: 9750             3rd Qu.: 7432    3rd Qu.: 3360.0  
##  Max.   :41314.0   Max.   :31847             Max.   :25008    Max.   :12182.0  
##                                                                                
##   school_count    eviction_count    store_count       sub_count     
##  Min.   : 1.000   Min.   :   1.0   Min.   :  1.00   Min.   : 1.000  
##  1st Qu.: 2.000   1st Qu.:  49.0   1st Qu.: 17.00   1st Qu.: 1.000  
##  Median : 5.000   Median : 148.0   Median : 45.00   Median : 1.000  
##  Mean   : 6.915   Mean   : 238.2   Mean   : 51.99   Mean   : 2.406  
##  3rd Qu.: 9.250   3rd Qu.: 370.5   3rd Qu.: 76.25   3rd Qu.: 3.000  
##  Max.   :31.000   Max.   :1126.0   Max.   :202.00   Max.   :17.000  
##                                                                     
##    bus_count      mean_ridership   perc_covered_by_transit
##  Min.   :  1.00   Min.   :   273   Min.   :  0.00000      
##  1st Qu.: 30.00   1st Qu.:  5027   1st Qu.:  0.01254      
##  Median : 49.00   Median :  7964   Median : 34.01486      
##  Mean   : 52.73   Mean   : 12007   Mean   : 45.67236      
##  3rd Qu.: 68.00   3rd Qu.: 14065   3rd Qu.: 69.79902      
##  Max.   :243.00   Max.   :109922   Max.   :254.34296      
##                   NA's   :100                             
##  transportation_desert_3cat       transportation_desert_4cat   borough         
##  Length:224                 No Access          :56           Length:224        
##  Class :character           Limited Access     :79           Class :character  
##  Mode  :character           Satisfactory Access:59           Mode  :character  
##                             Excellent Access   :30                             
##                                                                                
##                                                                                
##                                                                                
##           geometry  
##  MULTIPOLYGON :224  
##  epsg:4269    :  0  
##  +proj=long...:  0  
##                     
##                     
##                     
## 
  library(table1)
table1(~ total_pop + mean_income + below_poverty_line_count+ 
         mean_rent + unemployment_count + white_count + latinx_count + black_count + 
         native_count + asian_count + uninsured_count + school_count + eviction_count +
         store_count + transportation_desert_4cat+ sub_count + bus_count + mean_ridership + perc_covered_by_transit | borough, data = nyc_clean %>% as.tibble(),  latex_options="HOLD_position") 
Bronx
(N=44)
Brooklyn
(N=64)
Manhattan
(N=39)
Queens
(N=77)
Overall
(N=224)
total_pop
Mean (SD) 30200 (18700) 37800 (24600) 38300 (24600) 25900 (20900) 32300 (22800)
Median [Min, Max] 29800 [0, 69200] 38300 [0, 97800] 35700 [0, 95300] 25000 [0, 87700] 31600 [0, 97800]
mean_income
Mean (SD) 43400 (17900) 69600 (28700) 103000 (49700) 74500 (14400) 71900 (33900)
Median [Min, Max] 38000 [23100, 94200] 61200 [27400, 148000] 108000 [33300, 212000] 72600 [37500, 104000] 67200 [23100, 212000]
Missing 8 (18.2%) 9 (14.1%) 6 (15.4%) 19 (24.7%) 42 (18.8%)
below_poverty_line_count
Mean (SD) 8360 (6490) 7430 (6120) 6180 (6100) 3090 (2900) 5900 (5700)
Median [Min, Max] 7260 [0, 21600] 6880 [0, 28800] 3290 [0, 22800] 2680 [0, 11600] 4220 [0, 28800]
mean_rent
Mean (SD) 1230 (197) 1580 (465) 1960 (674) 1650 (198) 1600 (465)
Median [Min, Max] 1240 [833, 1620] 1450 [792, 3280] 2070 [884, 3270] 1630 [1140, 2250] 1510 [792, 3280]
Missing 8 (18.2%) 9 (14.1%) 6 (15.4%) 19 (24.7%) 42 (18.8%)
unemployment_count
Mean (SD) 1400 (972) 1180 (903) 1210 (1100) 756 (685) 1080 (916)
Median [Min, Max] 1330 [0, 3150] 1120 [0, 3770] 952 [0, 4700] 668 [0, 3150] 919 [0, 4700]
white_count
Mean (SD) 2830 (4990) 13900 (14200) 17200 (14700) 6740 (8500) 9850 (12300)
Median [Min, Max] 919 [0, 27500] 10900 [0, 64500] 13800 [0, 69300] 4200 [0, 43900] 4960 [0, 69300]
latinx_count
Mean (SD) 17300 (12500) 7230 (7660) 10500 (13700) 6990 (8360) 9690 (10900)
Median [Min, Max] 16000 [0, 43600] 4630 [0, 32000] 4480 [0, 60700] 4620 [0, 37600] 5580 [0, 60700]
black_count
Mean (SD) 8380 (8080) 11300 (16000) 5080 (9300) 4370 (8050) 7260 (11400)
Median [Min, Max] 6650 [0, 37600] 2980 [0, 69700] 1370 [0, 48800] 1070 [0, 44500] 2010 [0, 69700]
native_count
Mean (SD) 66.6 (96.8) 51.1 (64.1) 40.6 (59.4) 63.3 (101) 56.5 (84.5)
Median [Min, Max] 27.0 [0, 403] 35.0 [0, 307] 22.0 [0, 303] 13.0 [0, 601] 26.0 [0, 601]
asian_count
Mean (SD) 1100 (1310) 4320 (6610) 4490 (4350) 6640 (8200) 4510 (6530)
Median [Min, Max] 735 [0, 5490] 2510 [0, 41300] 3700 [0, 23600] 4070 [0, 38000] 2260 [0, 41300]
uninsured_count
Mean (SD) 2570 (1990) 2750 (2260) 2030 (2150) 2350 (2510) 2450 (2280)
Median [Min, Max] 2340 [0, 8030] 2610 [0, 10100] 1380 [0, 10300] 1660 [0, 12200] 1910 [0, 12200]
school_count
Mean (SD) 8.64 (7.44) 8.16 (6.79) 8.54 (7.79) 4.08 (2.88) 6.92 (6.41)
Median [Min, Max] 5.50 [1.00, 27.0] 6.00 [1.00, 31.0] 5.00 [1.00, 28.0] 3.00 [1.00, 12.0] 5.00 [1.00, 31.0]
eviction_count
Mean (SD) 438 (340) 245 (234) 223 (233) 126 (131) 238 (255)
Median [Min, Max] 406 [1.00, 1130] 163 [1.00, 829] 152 [1.00, 1120] 93.0 [1.00, 521] 148 [1.00, 1130]
store_count
Mean (SD) 53.2 (38.6) 69.8 (49.5) 58.6 (39.8) 33.1 (33.8) 52.0 (43.1)
Median [Min, Max] 48.5 [1.00, 138] 70.5 [1.00, 202] 54.0 [1.00, 151] 24.0 [1.00, 147] 45.0 [1.00, 202]
transportation_desert_4cat
No Access 5 (11.4%) 9 (14.1%) 2 (5.1%) 40 (51.9%) 56 (25.0%)
Limited Access 21 (47.7%) 23 (35.9%) 6 (15.4%) 29 (37.7%) 79 (35.3%)
Satisfactory Access 14 (31.8%) 24 (37.5%) 13 (33.3%) 8 (10.4%) 59 (26.3%)
Excellent Access 4 (9.1%) 8 (12.5%) 18 (46.2%) 0 (0%) 30 (13.4%)
sub_count
Mean (SD) 1.95 (1.33) 2.77 (2.06) 4.03 (3.53) 1.55 (1.22) 2.41 (2.23)
Median [Min, Max] 1.00 [1.00, 7.00] 2.00 [1.00, 9.00] 3.00 [1.00, 17.0] 1.00 [1.00, 6.00] 1.00 [1.00, 17.0]
bus_count
Mean (SD) 40.1 (22.9) 60.2 (41.1) 46.6 (26.0) 56.9 (45.0) 52.7 (38.0)
Median [Min, Max] 43.5 [1.00, 125] 59.0 [1.00, 170] 44.0 [2.00, 106] 52.0 [1.00, 243] 49.0 [1.00, 243]
mean_ridership
Mean (SD) 7180 (3410) 7400 (4690) 22900 (19500) 10900 (12200) 12000 (13300)
Median [Min, Max] 6670 [2420, 15700] 6610 [1040, 26600] 18000 [5640, 110000] 7920 [273, 55700] 7960 [273, 110000]
Missing 21 (47.7%) 18 (28.1%) 7 (17.9%) 54 (70.1%) 100 (44.6%)
perc_covered_by_transit
Mean (SD) 42.3 (40.8) 50.6 (38.0) 103 (67.2) 14.8 (21.9) 45.7 (50.7)
Median [Min, Max] 31.4 [0, 163] 50.5 [0, 159] 99.7 [0, 254] 0 [0, 87.3] 34.0 [0, 254]

Data Visuals

Non-Spatial

library(ggridges)
plot_1<-nyc_clean %>% 
  ggplot(aes(x=mean_income, y=borough, fill=borough), alpha=.6) +
  geom_density_ridges() +
  labs(title="Mean Income", y="")+
    theme(panel.grid.major = element_line("transparent"),
          axis.text.y.left = element_text(size = 16, face = "bold"),
          plot.title = element_text(size = 28,hjust=.5, face = "bold"),
          legend.position="none") +
  scale_fill_manual(values=c("#e09f3e","#16bac5","#717ec3","#5da271"))

plot_2<-nyc_clean %>% 
  ggplot(aes(x=below_poverty_line_count, y=borough, fill=borough), alpha=.6) +
  geom_density_ridges() +
  labs(title="Number Below Poverty Line", y="")+
    theme(panel.grid.major = element_line("transparent"),
          axis.text.y.left = element_text(size = 16, face = "bold"),
          plot.title = element_text(size = 28,hjust=.5, face = "bold"),
          legend.position="none") +
  scale_fill_manual(values=c("#e09f3e","#16bac5","#717ec3","#5da271"))

plot_3<-nyc_clean %>% 
  ggplot(aes(x=mean_rent, y=borough, fill=borough), alpha=.6) +
  geom_density_ridges() +
  labs(title="Mean Rent", y="")+
    theme(panel.grid.major = element_line("transparent"),
          axis.text.y.left = element_text(size = 16, face = "bold"),
          plot.title = element_text(size = 28,hjust=.5, face = "bold"),
          legend.position="none") +
  scale_fill_manual(values=c("#e09f3e","#16bac5","#717ec3","#5da271"))

plot_4<-nyc_clean %>% 
  ggplot(aes(x=unemployment_count, y=borough, fill=borough), alpha=.6) +
  geom_density_ridges() +
  labs(title="Unemployed Counts", y="")+
    theme(panel.grid.major = element_line("transparent"),
          axis.text.y.left = element_text(size = 16, face = "bold"),
          plot.title = element_text(size = 28,hjust=.5, face = "bold"),
          legend.position="none") +
  scale_fill_manual(values=c("#e09f3e","#16bac5","#717ec3","#5da271"))

plot_5<-nyc_clean %>% 
  ggplot(aes(x=white_count, y=borough, fill=borough), alpha=.6) +
  geom_density_ridges() +
  labs(title="White Counts", y="")+
    theme(panel.grid.major = element_line("transparent"),
          axis.text.y.left = element_text(size = 16, face = "bold"),
          plot.title = element_text(size = 28,hjust=.5, face = "bold"),
          legend.position="none") +
  scale_fill_manual(values=c("#e09f3e","#16bac5","#717ec3","#5da271"))

plot_6<-nyc_clean %>% 
  ggplot(aes(x=uninsured_count, y=borough, fill=borough), alpha=.6) +
  geom_density_ridges() +
  labs(title="Uninsured Counts", y="")+
    theme(panel.grid.major = element_line("transparent"),
          axis.text.y.left = element_text(size = 16, face = "bold"),
          plot.title = element_text(size = 28,hjust=.5, face = "bold"),
          legend.position="none") +
  scale_fill_manual(values=c("#e09f3e","#16bac5","#717ec3","#5da271"))


plot_7<-nyc_clean %>% 
  ggplot(aes(x=school_count, y=borough, fill=borough), alpha=.6) +
  geom_density_ridges() +
  labs(title="School Counts", y="")+
    theme(panel.grid.major = element_line("transparent"),
          axis.text.y.left = element_text(size = 16, face = "bold"),
          plot.title = element_text(size = 28,hjust=.5, face = "bold"),
          legend.position="none") +
  scale_fill_manual(values=c("#e09f3e","#16bac5","#717ec3","#5da271"))

plot_8<-nyc_clean %>% 
  ggplot(aes(x=eviction_count, y=borough, fill=borough), alpha=.6) +
  geom_density_ridges() +
  labs(title="Eviction Counts", y="")+
    theme(panel.grid.major = element_line("transparent"),
          axis.text.y.left = element_text(size = 16, face = "bold"),
          plot.title = element_text(size = 28,hjust=.5, face = "bold"),
          legend.position="none") +
  scale_fill_manual(values=c("#e09f3e","#16bac5","#717ec3","#5da271"))

plot_9<-nyc_clean %>% 
  ggplot(aes(x=store_count, y=borough, fill=borough), alpha=.6) +
  geom_density_ridges() +
  labs(title="Food Retail Counts", y="")+
    theme(panel.grid.major = element_line("transparent"),
          axis.text.y.left = element_text(size = 16, face = "bold"),
          plot.title = element_text(size = 28,hjust=.5, face = "bold"),
          legend.position="none") +
  scale_fill_manual(values=c("#e09f3e","#16bac5","#717ec3","#5da271"))

plot_10<-nyc_clean %>% 
  ggplot(aes(x=borough, fill=transportation_desert_4cat), alpha=.6) +
  geom_bar(position="fill") +
  scale_y_continuous(labels = seq(0, 100, by = 25)) +
  labs(title="Subway Accessibility", y="", x="")+
    theme(panel.grid.major = element_line("transparent"),
         # axis.text.y.left = element_blank(),
          axis.text.x.bottom = element_text(size = 16, face = "bold"),
          plot.title = element_text(size = 28,hjust=.5, face = "bold")) +
   scale_fill_manual(values=c("#a45371","#e5b6c7","#ebebf7","#89a2d1"),
                       guide = guide_legend(title = "Subway Accessibility"), na.value="#D6D6D6") 

plot_11<-nyc_clean %>% 
  ggplot(aes(x=sub_count, y=borough, fill=borough), alpha=.6) +
  geom_density_ridges() +
  labs(title="Subway Stop Counts", y="")+
    theme(panel.grid.major = element_line("transparent"),
          axis.text.y.left = element_text(size = 16, face = "bold"),
          plot.title = element_text(size = 28,hjust=.5, face = "bold"),
          legend.position="none") +
  scale_fill_manual(values=c("#e09f3e","#16bac5","#717ec3","#5da271"))


plot_12<-nyc_clean %>% 
  ggplot(aes(x=bus_count, y=borough, fill=borough), alpha=.6) +
  geom_density_ridges() +
  labs(title="Bus Stop Counts", y="")+
    theme(panel.grid.major = element_line("transparent"),
          axis.text.y.left = element_text(size = 16, face = "bold"),
          plot.title = element_text(size = 28,hjust=.5, face = "bold"),
          legend.position="none") +
  scale_fill_manual(values=c("#e09f3e","#16bac5","#717ec3","#5da271"))

plot_13<-nyc_clean %>% 
  ggplot(aes(x=mean_ridership, y=borough, fill=borough), alpha=.6) +
  geom_density_ridges() +
  labs(title="Mean Ridership", y="")+
    theme(panel.grid.major = element_line("transparent"),
          axis.text.y.left = element_text(size = 16, face = "bold"),
          plot.title = element_text(size = 28,hjust=.5, face = "bold"),
          legend.position="none") +
  scale_fill_manual(values=c("#e09f3e","#16bac5","#717ec3","#5da271"))
library(egg)
ggarrange(plot_1, plot_2, plot_3, 
          plot_4, plot_5, plot_6, 
          plot_7, plot_8, plot_9, 
          plot_10, plot_11, plot_12, 
          plot_13, 
          ncol=4)

Spatial

library(egg)
ggarrange(subway_loc, bus_loc, stops, bus_stops, ridership, access, ncol=3)

ggarrange(red, orange, yellow, green, teal, blue, purple, pink, brown, ncol=3)

ggarrange(white, black, latinx, asian, ncol=2)

Model Building

Models 1 & 2: Non-Hierarchical Grouping

Poisson

\[\begin{split} \text{White}_{i} \mid \beta_{0}, \beta_1, ..., \beta_k & \sim \text{Pois}(\lambda_{i}) \; \; \; \; \text{where} \log(\lambda_{i}) = \beta_{0} + \sum^{10}_{k=1}X_{ik}\beta_k \\ \beta_{0c} &\sim N(0,1) \\ \beta_{1} & \sim ~ N(0, 0.00013^2) \\ \beta_{2} & \sim ~ N(0, 0.000073^2) \\ \beta_{3} & \sim ~ N(0, 0.0053^2) \\ \beta_{4} & \sim ~ N(0, 0.0029^2) \\ \beta_{5} & \sim ~ N(0, 5.281^2) \\ \beta_{6} & \sim ~ N(0, 5.375^2) \\ \beta_{7} & \sim ~ N(0, 6.719^2) \\ \beta_{8} & \sim ~ N(0, 0.3905^2) \\ \beta_{9} & \sim ~ N(0,0.0717^2) \\ \beta_{10} & \sim ~ N(0, 0.063^2) \\ \end{split}\]

poisson_non_hierarchical <- stan_glm(
  white_count ~ mean_income + mean_rent + 
    unemployment_count + sub_count + 
    transportation_desert_4cat + school_count + 
    store_count + bus_count + 
    eviction_count + uninsured_count,
  data = nyc_clean,
  family = poisson,
  chains = 2, iter = 100*2, seed = 84735, refresh = 0
)
pp_check(poisson_non_hierarchical) + 
  xlab("White Resident Count") +
  labs(title = "Poisson")+
  theme(plot.title =  element_text(face="bold", size=25, hjust=.5)) 

library(kableExtra)

tidy(poisson_non_hierarchical, effects = "fixed", conf.int = TRUE, conf.level = 0.95) %>% 
  
  mutate(estimate= ifelse(term == "(Intercept)", exp(estimate), (exp(estimate)-1)*100), 
         conf.low= ifelse(term == "(Intercept)", exp(conf.low), (exp(conf.low)-1)*100), 
         conf.high = ifelse(term == "(Intercept)", exp(conf.high), (exp(conf.high)-1)*100))%>%
  filter(conf.low   > 0 & conf.high > 0 | conf.low  < 0 & conf.high < 0) %>%
  kable(align = "c", caption = "Non-Hierarchical Poisson - Model Summary") %>% 
  kable_styling()
Non-Hierarchical Poisson - Model Summary
term estimate std.error conf.low conf.high
(Intercept) 2673.9230456 0.0047997 2648.1986800 2698.2583289
mean_income 0.0010982 0.0000000 0.0010878 0.0011079
mean_rent -0.0185651 0.0000036 -0.0192143 -0.0178724
unemployment_count 0.0294420 0.0000017 0.0291006 0.0298129
sub_count -5.8733585 0.0003365 -5.9328742 -5.8042892
transportation_desert_4catLimited Access 93.8149175 0.0028888 92.7116596 94.9803540
transportation_desert_4catSatisfactory Access 80.7154971 0.0033672 79.4632842 81.8809950
transportation_desert_4catExcellent Access 107.9553205 0.0037653 106.2443491 109.6271776
school_count 0.8084527 0.0001484 0.7798436 0.8400224
store_count 1.2401740 0.0000306 1.2342956 1.2462133
bus_count 0.3800261 0.0000290 0.3738134 0.3853999
eviction_count -0.3209642 0.0000065 -0.3222356 -0.3198271
uninsured_count -0.0065271 0.0000005 -0.0066275 -0.0064293
nyc_predict_clean <- nyc_clean %>%
  na.omit()
set.seed(84735)

predictions_poisson <-  posterior_predict(
  poisson_non_hierarchical, newdata = nyc_predict_clean)

library(tidybayes)
library(bayesplot)

ppc_intervals(nyc_predict_clean$white_count, yrep = predictions_poisson,
              prob_outer = 0.95) +
  ggplot2::scale_x_continuous(
    labels = nyc_predict_clean$nta_id,
    breaks = 1:nrow(nyc_predict_clean)) +
  xaxis_text(angle = 90, hjust = 1) +
  theme_linedraw()+
  theme(panel.grid.major = element_line("transparent"),
        axis.title.y = element_blank(),
        axis.text.y = element_blank())+
  coord_flip() 

Negative Binomial

\[\begin{split} \text{White}_{ij} \mid \beta_{0j}, \beta_1, ..., \beta_k, \sigma_y & \sim \text{NegBin}(\mu_{ij}, r) \; \; \; \; \text{where} \log(\lambda_{i}) = \beta_{0} + \sum^{10}_{k=1}X_{ik}\beta_k \\ \beta_{0c} &\sim N(0,1) \\ \beta_{1} & \sim ~ N(0, 0.00013^2) \\ \beta_{2} & \sim ~ N(0, 0.000073^2) \\ \beta_{3} & \sim ~ N(0, 0.0053^2) \\ \beta_{4} & \sim ~ N(0, 0.0029^2) \\ \beta_{5} & \sim ~ N(0, 5.281^2) \\ \beta_{6} & \sim ~ N(0, 5.375^2) \\ \beta_{7} & \sim ~ N(0, 6.719^2) \\ \beta_{8} & \sim ~ N(0, 0.3905^2) \\ \beta_{9} & \sim ~ N(0,0.0717^2) \\ \beta_{10} & \sim ~ N(0, 0.063^2) \\ \end{split}\]

negbin_non_hierarchical <- stan_glm(
  white_count ~  mean_income + mean_rent + 
    unemployment_count + sub_count + 
    transportation_desert_4cat + school_count + 
    store_count + bus_count + 
    eviction_count + uninsured_count,
  data = nyc_clean,
  family = neg_binomial_2,
  chains = 2, iter = 100*2, seed = 84735, refresh = 0
)
pp_check(negbin_non_hierarchical) + 
  xlab("White Resident Count") +
  xlim(0,100000)+
  labs(title = "Negative Binomial")+
  theme(plot.title =  element_text(face="bold", size=25, hjust=.5)) 

tidy(negbin_non_hierarchical, effects = "fixed", conf.int = TRUE, conf.level = 0.95)%>% 
  
  mutate(estimate= ifelse(term == "(Intercept)", exp(estimate), (exp(estimate)-1)*100), 
         conf.low= ifelse(term == "(Intercept)", exp(conf.low), (exp(conf.low)-1)*100), 
         conf.high = ifelse(term == "(Intercept)", exp(conf.high), (exp(conf.high)-1)*100))%>%
  filter(conf.low   > 0 & conf.high > 0 | conf.low  < 0 & conf.high < 0) %>%
  kable(align = "c", caption = "Non-Hierarchichal Negative Binomial - Model Summary") %>% 
  kable_styling()
Non-Hierarchichal Negative Binomial - Model Summary
term estimate std.error conf.low conf.high
(Intercept) 1543.7182271 0.4090195 574.4689288 3177.8562354
mean_income 0.0013119 0.0000065 0.0001697 0.0028603
unemployment_count 0.0449154 0.0001464 0.0067808 0.0755681
transportation_desert_4catLimited Access 117.1106252 0.2295123 41.8845016 246.0971086
transportation_desert_4catSatisfactory Access 127.0057951 0.2432396 40.8460810 269.8539369
transportation_desert_4catExcellent Access 105.0543669 0.3250979 10.3982122 298.1079735
store_count 0.9651381 0.0033081 0.2246905 1.5644025
bus_count 0.6629433 0.0034553 0.0949130 1.2857114
eviction_count -0.3253923 0.0004539 -0.3981334 -0.2337496
nyc_predict_clean <- nyc_clean %>%
  na.omit()
set.seed(84735)

predictions_negbin <-  posterior_predict(
  negbin_non_hierarchical, newdata = nyc_predict_clean)

ppc_intervals(nyc_predict_clean$white_count, yrep = predictions_negbin,
              prob_outer = 0.95) +
  ggplot2::scale_x_continuous(
    labels = nyc_predict_clean$nta_id,
    breaks = 1:nrow(nyc_predict_clean)) +
    xaxis_text(angle = 90,  hjust = 1) +
  theme_linedraw()+
  theme(panel.grid.major = element_line("transparent"),
        axis.title.y = element_blank(),
        axis.text.y = element_blank())+
  coord_flip()

Models 2 & 3: Hierarchy by Borough

Poisson

\[\begin{split} \text{White}_{ij} \mid \beta_{0j}, \beta_1, ..., \beta_k, \sigma_y & \sim \text{Pois}(\lambda_{ij}) \\ & \text{where} \log(\lambda_{ij}) = \beta_{0j} + \sum^{11}_{k=1}X_{ijk}\beta_k \\ \beta_{0j} \mid \beta_0, \sigma_0 & \stackrel{ind}{\sim} N(\beta_0, \sigma_0^2)\\ \beta_{0c}, \beta_k & \stackrel{ind}{\sim} N(0,1) \\ \sigma_y, \sigma_0 & \stackrel{ind}{\sim} Exp(1) \end{split}\]

poisson_hierarchical <- stan_glmer(
 white_count ~ mean_income + mean_rent + 
    unemployment_count + sub_count + 
    transportation_desert_4cat + school_count + 
    store_count + bus_count + 
    eviction_count + uninsured_count + (1 | borough),
  data = nyc_clean,
  family = poisson,
  chains = 2, iter = 100*2, seed = 84735, refresh = 0
)
pp_check(poisson_hierarchical) + 
  xlab("White Resident Count") +
  labs(title = "Poisson")+
  theme(plot.title =  element_text(face="bold", size=25, hjust=.5)) 

library(kableExtra)

tidy(poisson_hierarchical, effects = "fixed", conf.int = TRUE, conf.level = 0.95) %>% 
  
  mutate(estimate= ifelse(term == "(Intercept)", exp(estimate), (exp(estimate)-1)*100), 
         conf.low= ifelse(term == "(Intercept)", exp(conf.low), (exp(conf.low)-1)*100), 
         conf.high = ifelse(term == "(Intercept)", exp(conf.high), (exp(conf.high)-1)*100))%>%
  filter(conf.low   > 0 & conf.high > 0 | conf.low  < 0 & conf.high < 0) %>%
  kable(align = "c", caption = "Hierarchicahl Poisson - Model Summary") %>% 
  kable_styling()
Hierarchicahl Poisson - Model Summary
term estimate std.error conf.low conf.high
(Intercept) 1069.1723933 2.1861898 0.0147505 5027.2255154
mean_income 0.0008689 0.0000000 0.0007817 0.0009978
mean_rent -0.0187935 0.0000038 -0.0193944 -0.0088237
unemployment_count 0.0278937 0.0000019 0.0274688 0.0341182
sub_count -5.0478586 0.0004792 -6.9354075 -4.9657554
transportation_desert_4catLimited Access 84.6880605 0.0036811 79.0954805 106.2907635
transportation_desert_4catSatisfactory Access 55.7547128 0.0039514 52.6292018 83.3116846
transportation_desert_4catExcellent Access 64.3117599 0.0048316 61.3938444 113.7529792
school_count 0.5183774 0.0001783 0.4756261 0.8698601
store_count 1.0340819 0.0000397 1.0103805 1.5690242
bus_count 0.5169462 0.0000314 0.4856269 0.5659028
eviction_count -0.3156635 0.0000091 -0.4238896 -0.3142123
uninsured_count -0.0049824 0.0000008 -0.0065110 -0.0045559
nyc_predict_clean <- nyc_clean %>%
  na.omit()
set.seed(84735)

predictions_poisson <-  posterior_predict(
  poisson_hierarchical, newdata = nyc_predict_clean)

library(tidybayes)
library(bayesplot)

ppc_intervals(nyc_predict_clean$white_count, yrep = predictions_poisson,
              prob_outer = 0.95) +
  ggplot2::scale_x_continuous(
    labels = nyc_predict_clean$nta_id,
    breaks = 1:nrow(nyc_predict_clean)) +
  xaxis_text(angle = 90, hjust = 1) +
  theme_linedraw()+
  theme(panel.grid.major = element_line("transparent"),
        axis.title.y = element_blank(),
        axis.text.y = element_blank())+
  coord_flip() 

Negative Binomial

\[\begin{split} \text{White}_{ij} \mid \beta_{0j}, \beta_1, ..., \beta_k, \sigma_y & \sim \text{NegBin}(\mu_{ij}, r) \\ & \text{where} \log(\mu_{ij}) = \beta_{0j} + \sum^{11}_{k=1}X_{ijk}\beta_k \\ \beta_{0j} \mid \beta_0, \sigma_0 & \stackrel{ind}{\sim} N(\beta_0, \sigma_0^2)\\ \beta_{0c}, \beta_k & \stackrel{ind}{\sim} N(0,1) \\ \sigma_y, \sigma_0 & \stackrel{ind}{\sim} Exp(1) \end{split}\]

negbin_hierarchical <- stan_glmer(
 white_count ~ mean_income + mean_rent + 
    unemployment_count + sub_count + 
    transportation_desert_4cat + school_count + 
    store_count + bus_count + 
    eviction_count + uninsured_count + (1 | borough),
  data = nyc_clean,
  family = neg_binomial_2,
  chains = 2, iter = 100*2, seed = 84735, refresh = 0)
pp_check(negbin_hierarchical) + 
  xlab("White Resident Count") +
  labs(title = "Negative Binomial")+
  theme(plot.title =  element_text(face="bold", size=25, hjust=.5)) 

tidy(negbin_hierarchical, effects = "fixed", conf.int = TRUE, conf.level = 0.95)%>% 
  
  mutate(estimate= ifelse(term == "(Intercept)", exp(estimate), (exp(estimate)-1)*100), 
         conf.low= ifelse(term == "(Intercept)", exp(conf.low), (exp(conf.low)-1)*100), 
         conf.high = ifelse(term == "(Intercept)", exp(conf.high), (exp(conf.high)-1)*100))%>%
  filter(conf.low   > 0 & conf.high > 0 | conf.low  < 0 & conf.high < 0) %>%
  kable(align = "c", caption = "Hierarchichal Negative Binomial - Model Summary") %>% 
  kable_styling()
Hierarchichal Negative Binomial - Model Summary
term estimate std.error conf.low conf.high
(Intercept) 1464.5798433 0.5871966 582.2935447 4828.0873124
mean_income 0.0013683 0.0000071 0.0001839 0.0026254
unemployment_count 0.0394118 0.0001816 0.0042854 0.0772274
transportation_desert_4catLimited Access 121.5471553 0.2169129 39.4496677 272.4318557
transportation_desert_4catSatisfactory Access 111.9518341 0.2723560 24.7854200 268.7633079
store_count 0.7966206 0.0032517 0.0813384 1.5285805
bus_count 0.6047094 0.0028784 0.1039942 1.1975253
eviction_count -0.2781839 0.0005468 -0.3893503 -0.1903382
nyc_predict_clean <- nyc_clean %>%
  na.omit()
set.seed(84735)

predictions_negbin <-  posterior_predict(
  negbin_hierarchical, newdata = nyc_predict_clean)

ppc_intervals(nyc_predict_clean$white_count, yrep = predictions_negbin,
              prob_outer = 0.95) +
  ggplot2::scale_x_continuous(
    labels = nyc_predict_clean$nta_id,
    breaks = 1:nrow(nyc_predict_clean)) +
    xaxis_text(angle = 90,  hjust = 1) +
  theme_linedraw()+
  theme(panel.grid.major = element_line("transparent"),
        axis.title.y = element_blank(),
        axis.text.y = element_blank())+
  coord_flip()

Next Steps

We intend to adjust our model specifications and reconsider the construction and inclusion of our predictors. For example, we may want to rethink how we’re making our transit-access variable, whether including car ownership or transit use, and what predictors we’re specifically using in our models.

Moving forward, we want to build some non-hierarchical models predicting subway and bus count using the demographic data (like median income, rent, percentage white population, etc) to see the flip-side of what we are looking at right now with our models above.

Our current “hierarchical models” attempt to capture a spatial interaction, but are not properly set up. So, we will include latitude and longitude variables into our model to account for spatial relationships and perhaps do away with hierarchy. However, we may introduce a borough-wide level of grouping or use CARBayes to adjust for the spatial factors.

Participation

  • Sam Ding: Pulled and prepared subway ridership and access data, then harmonized transit data with existing neighborhood data.
  • Vichy Meas: Helped clean both transit and demographic data, scaffolded the checkpoint 3 responses, and helped plan out transit questions.
  • Freddy Barragan: Pulled and prepared grocery, bus stop, school, eviction, and census data, made exploratory visualizations by neighborhood, built/coded our proposed demographic models.
  • Juthi Dewan: Harmonized, cleaned, and joined all disparate datasets, prepared our data summaries, built/coded our proposed demographic models.